Time series clustering is to partition time series data into groups based on similarity or distance, so that time series in the same cluster are similar.
Methodology followed:
from vrae.vrae import VRAE
from vrae.utils import *
from vrae.utils_EMG import *
import numpy as np
import torch
import pickle
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.manifold import TSNE
from sklearn.metrics import mean_squared_error as mse
import plotly
from torch.utils.data import DataLoader, TensorDataset
plotly.offline.init_notebook_mode()
%load_ext autoreload
%autoreload 2
The autoreload extension is already loaded. To reload it, use: %reload_ext autoreload
dload = './model_dir'
seq_len = 10
hidden_size = 256
hidden_layer_depth = 3
latent_length = 16
batch_size = 32
learning_rate = 0.00002
n_epochs = 1500
dropout_rate = 0.0
optimizer = 'Adam' # options: ADAM, SGD
cuda = True # options: True, False
print_every=10
clip = True # options: True, False
max_grad_norm=5
loss = 'MSELoss' # options: SmoothL1Loss, MSELoss
block = 'LSTM' # options: LSTM, GRU
output = True
training_file = ['20201020_Pop_Cage_001','20201020_Pop_Cage_002','20201020_Pop_Cage_003','20201020_Pop_Cage_004',
'20201020_Pop_Cage_006']
X_train, y_train = load_data(direc = 'data', dataset="EMG", all_file = training_file,
do_pca = False, single_channel = None,
batch_size = batch_size, seq_len = seq_len, pca_component = 6)
train_dataset = TensorDataset(torch.from_numpy(X_train))
Loading 20201020_Pop_Cage_001, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3.] Loading 20201020_Pop_Cage_002, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3.] Loading 20201020_Pop_Cage_003, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Loading 20201020_Pop_Cage_004, X shape (3601, 150, 1), y shape (3601, 1), has label [-1. 0. 1. 2. 3. 4.] Loading 20201020_Pop_Cage_006, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Dataset shape: (17984, 10, 15) Label: [-1. 0. 1. 2. 3. 4.], shape: (17984, 1)
num_features = X_train.shape[2]
VRAE inherits from sklearn.base.BaseEstimator and overrides fit, transform and fit_transform functions, similar to sklearn modules
from vrae.vrae import VRAE
vrae = VRAE(sequence_length=seq_len,
number_of_features = num_features,
hidden_size = hidden_size,
hidden_layer_depth = hidden_layer_depth,
latent_length = latent_length,
batch_size = batch_size,
learning_rate = learning_rate,
n_epochs = n_epochs,
dropout_rate = dropout_rate,
optimizer = optimizer,
cuda = cuda,
print_every=print_every,
clip=clip,
max_grad_norm=max_grad_norm,
loss = loss,
block = block,
dload = dload,
output = output)
/home/roton2/miniconda3/envs/emg/lib/python3.9/site-packages/torch/nn/_reduction.py:42: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.
#vrae.fit(train_dataset)
#If the model has to be saved, with the learnt parameters use:
vrae.fit(train_dataset)
Epoch: 9 Average loss: 5569730.0682 Epoch: 19 Average loss: 3732458.4848 Epoch: 29 Average loss: 2653692.3193 Epoch: 39 Average loss: 1980907.2225 Epoch: 49 Average loss: 1529497.8174 Epoch: 59 Average loss: 1236365.2767 Epoch: 69 Average loss: 1027439.0582 Epoch: 79 Average loss: 881177.5116 Epoch: 89 Average loss: 780301.0065 Epoch: 99 Average loss: 705758.5085 Epoch: 109 Average loss: 649304.0436 Epoch: 119 Average loss: 607651.4351 Epoch: 129 Average loss: 574628.2849 Epoch: 139 Average loss: 548216.1422 Epoch: 149 Average loss: 526444.9694 Epoch: 159 Average loss: 507821.9211 Epoch: 169 Average loss: 492410.0023 Epoch: 179 Average loss: 479200.3794 Epoch: 189 Average loss: 467370.3477 Epoch: 199 Average loss: 456903.0827 Epoch: 209 Average loss: 447540.3084 Epoch: 219 Average loss: 438803.0714 Epoch: 229 Average loss: 430883.0339 Epoch: 239 Average loss: 423330.4571 Epoch: 249 Average loss: 416496.0257 Epoch: 259 Average loss: 409704.3448 Epoch: 269 Average loss: 403419.8486 Epoch: 279 Average loss: 397517.4788 Epoch: 289 Average loss: 391475.4533 Epoch: 299 Average loss: 385871.1303 Epoch: 309 Average loss: 380375.7007 Epoch: 319 Average loss: 374983.4559 Epoch: 329 Average loss: 369932.0704 Epoch: 339 Average loss: 364966.0237 Epoch: 349 Average loss: 360056.2494 Epoch: 359 Average loss: 355519.9942 Epoch: 369 Average loss: 350842.7144 Epoch: 379 Average loss: 346408.2634 Epoch: 389 Average loss: 342173.7632 Epoch: 399 Average loss: 337990.9488 Epoch: 409 Average loss: 333848.3854 Epoch: 419 Average loss: 329842.6836 Epoch: 429 Average loss: 326039.3108 Epoch: 439 Average loss: 322077.3499 Epoch: 449 Average loss: 318477.5495 Epoch: 459 Average loss: 314846.8542 Epoch: 469 Average loss: 311418.6242 Epoch: 479 Average loss: 307992.1982 Epoch: 489 Average loss: 304656.6243 Epoch: 499 Average loss: 301413.6299 Epoch: 509 Average loss: 298277.2683 Epoch: 519 Average loss: 295275.0865 Epoch: 529 Average loss: 292186.0763 Epoch: 539 Average loss: 289326.4990 Epoch: 549 Average loss: 286432.6661 Epoch: 559 Average loss: 283647.2205 Epoch: 569 Average loss: 280873.6833 Epoch: 579 Average loss: 278163.9420 Epoch: 589 Average loss: 275649.8202 Epoch: 599 Average loss: 272964.2555 Epoch: 609 Average loss: 270534.1769 Epoch: 619 Average loss: 268114.1000 Epoch: 629 Average loss: 265701.4245 Epoch: 639 Average loss: 263450.5183 Epoch: 649 Average loss: 261079.5857 Epoch: 659 Average loss: 258858.8500 Epoch: 669 Average loss: 256825.8093 Epoch: 679 Average loss: 254731.2724 Epoch: 689 Average loss: 252524.8937 Epoch: 699 Average loss: 250564.0954 Epoch: 709 Average loss: 248501.0327 Epoch: 719 Average loss: 246546.5805 Epoch: 729 Average loss: 244741.6537 Epoch: 739 Average loss: 242733.0368 Epoch: 749 Average loss: 240965.4992 Epoch: 759 Average loss: 239213.5026 Epoch: 769 Average loss: 237165.3642 Epoch: 779 Average loss: 235521.0486 Epoch: 789 Average loss: 233842.2583 Epoch: 799 Average loss: 232062.6950 Epoch: 809 Average loss: 230338.1972 Epoch: 819 Average loss: 228760.7855 Epoch: 829 Average loss: 227143.0640 Epoch: 839 Average loss: 225526.4552 Epoch: 849 Average loss: 223994.6093 Epoch: 859 Average loss: 222487.2021 Epoch: 869 Average loss: 220929.8764 Epoch: 879 Average loss: 219383.2342 Epoch: 889 Average loss: 218075.0535 Epoch: 899 Average loss: 216636.0686 Epoch: 909 Average loss: 215163.9137 Epoch: 919 Average loss: 213857.2248 Epoch: 929 Average loss: 212377.3878 Epoch: 939 Average loss: 211146.0805 Epoch: 949 Average loss: 209782.2900 Epoch: 959 Average loss: 208507.3163 Epoch: 969 Average loss: 207148.0224 Epoch: 979 Average loss: 205904.6757 Epoch: 989 Average loss: 204572.3600 Epoch: 999 Average loss: 203323.5779 Epoch: 1009 Average loss: 202147.3485 Epoch: 1019 Average loss: 200999.4990 Epoch: 1029 Average loss: 199830.1424 Epoch: 1039 Average loss: 198520.2240 Epoch: 1049 Average loss: 197475.5037 Epoch: 1059 Average loss: 196279.1025 Epoch: 1069 Average loss: 195121.9641 Epoch: 1079 Average loss: 194039.7757 Epoch: 1089 Average loss: 192932.6470 Epoch: 1099 Average loss: 191751.9479 Epoch: 1109 Average loss: 190719.1743 Epoch: 1119 Average loss: 189734.6135 Epoch: 1129 Average loss: 188679.3766 Epoch: 1139 Average loss: 187659.9829 Epoch: 1149 Average loss: 186456.9057 Epoch: 1159 Average loss: 185512.5871 Epoch: 1169 Average loss: 184514.6119 Epoch: 1179 Average loss: 183648.0873 Epoch: 1189 Average loss: 182694.2264 Epoch: 1199 Average loss: 181860.4960 Epoch: 1209 Average loss: 180701.4282 Epoch: 1219 Average loss: 179730.5293 Epoch: 1229 Average loss: 178884.2931 Epoch: 1239 Average loss: 177906.7527 Epoch: 1249 Average loss: 177010.9744 Epoch: 1259 Average loss: 176087.3977 Epoch: 1269 Average loss: 175350.4403 Epoch: 1279 Average loss: 174398.1017 Epoch: 1289 Average loss: 173619.5733 Epoch: 1299 Average loss: 172749.2621 Epoch: 1309 Average loss: 171911.1607 Epoch: 1319 Average loss: 171069.8416 Epoch: 1329 Average loss: 170472.6583 Epoch: 1339 Average loss: 169407.6872 Epoch: 1349 Average loss: 168705.7183 Epoch: 1359 Average loss: 167784.7963 Epoch: 1369 Average loss: 166955.3322 Epoch: 1379 Average loss: 166330.6540 Epoch: 1389 Average loss: 165371.7354 Epoch: 1399 Average loss: 164763.5229 Epoch: 1409 Average loss: 163918.4827 Epoch: 1419 Average loss: 163305.5890 Epoch: 1429 Average loss: 162478.6742 Epoch: 1439 Average loss: 161700.0603 Epoch: 1449 Average loss: 160925.7743 Epoch: 1459 Average loss: 160382.4997 Epoch: 1469 Average loss: 159508.2322 Epoch: 1479 Average loss: 158916.9800 Epoch: 1489 Average loss: 158028.0224 Epoch: 1499 Average loss: 157468.7399
plt.plot(vrae.all_loss)
[<matplotlib.lines.Line2D at 0x7f13d30d8580>]
plt.plot(vrae.rec_mse)
[<matplotlib.lines.Line2D at 0x7f13d3504c10>]
#If the latent vectors have to be saved, pass the parameter `save`
z_run = vrae.transform(train_dataset, save = True, filename = 'z_run_e57_b32_z16_output.pkl')
z_run.shape
(17984, 16)
vrae.save('./vrae_e57_b32_z16_output.pth')
vrae.load(dload+'/vrae_e57_b32_z16_output.pth')
with open(dload+'/z_run_e57_b32_z16_output.pkl', 'rb') as fh:
z_run = pickle.load(fh)
reconstruction = recon(vrae, X_train)
plot_recon_feature(X_train, reconstruction, idx = None)
_, _, _ = plot_recon_metrics(X_train, reconstruction, x_lim = [2000, 4000])
Channel 1, corr = 0.7110, mse = 33.716799, mean = 29.5886. Channel 2, corr = 0.6771, mse = 29.756823, mean = 27.4895. Channel 3, corr = 0.6532, mse = 41.490045, mean = 31.6063. Channel 4, corr = 0.5767, mse = 27.503560, mean = 19.6259. Channel 5, corr = 0.6052, mse = 19.172670, mean = 13.4139. Channel 6, corr = 0.6542, mse = 36.484139, mean = 32.0427. Channel 7, corr = 0.8390, mse = 36.721060, mean = 49.2383. Channel 8, corr = 0.8267, mse = 45.209181, mean = 54.5515. Channel 9, corr = 0.6626, mse = 22.393672, mean = 21.3511. Channel 10, corr = 0.7023, mse = 36.437465, mean = 30.8874. Channel 11, corr = 0.8226, mse = 24.789751, mean = 46.5397. Channel 12, corr = 0.6438, mse = 31.463966, mean = 21.5676. Channel 13, corr = 0.8610, mse = 33.888693, mean = 50.0767. Channel 14, corr = 0.8048, mse = 34.835835, mean = 39.4550. Channel 15, corr = 0.7677, mse = 35.170939, mean = 36.4381.
# recon_channel = pca_inverse(X_pca, reconstruction)
# plot_recon_feature(X_train_ori, recon_channel, idx = None)
# _, _, _ = plot_recon_metrics(X_train_ori, recon_channel, x_lim = [0, 2000])
testing_file = ['20201020_Pop_Cage_005', '20201020_Pop_Cage_007']
X_test, y_test = load_data(direc = 'data', dataset="EMG", all_file = testing_file,
do_pca = False, single_channel = None,
batch_size = batch_size, seq_len = seq_len, pca_component = 6)
Loading 20201020_Pop_Cage_005, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 5.] Loading 20201020_Pop_Cage_007, X shape (3599, 150, 1), y shape (3599, 1), has label [-1. 0. 1. 2. 3. 4.] Dataset shape: (7168, 10, 15) Label: [-1. 0. 1. 2. 3. 4. 5.], shape: (7168, 1)
# Uncomment if using pca
recon_test = recon(vrae, X_test)
# recon_channel_test = pca_inverse(test_pca, recon_test)
plot_recon_feature(X_test, recon_test, idx = None)
# plot_recon_feature(X_test_ori, recon_channel_test, idx = None)
corr_mean, mse_mean, mean_mean = plot_recon_metrics(X_test, recon_test, x_lim = [0, 2000])
# corr_mean, mse_mean, mean_mean = plot_recon_metrics(X_test_ori, recon_channel_test, x_lim = [0, 2000])
Channel 1, corr = 0.6121, mse = 83.637991, mean = 29.5584. Channel 2, corr = 0.5813, mse = 66.471095, mean = 27.4149. Channel 3, corr = 0.5066, mse = 103.552847, mean = 32.5453. Channel 4, corr = 0.4664, mse = 45.278386, mean = 19.5454. Channel 5, corr = 0.4905, mse = 34.796398, mean = 13.0227. Channel 6, corr = 0.4526, mse = 195.371225, mean = 30.9896. Channel 7, corr = 0.7110, mse = 222.719560, mean = 50.5355. Channel 8, corr = 0.7134, mse = 208.347560, mean = 55.5833. Channel 9, corr = 0.5677, mse = 44.837757, mean = 20.8698. Channel 10, corr = 0.5782, mse = 114.562752, mean = 31.7638. Channel 11, corr = 0.6755, mse = 413.536354, mean = 46.6334. Channel 12, corr = 0.4841, mse = 95.617897, mean = 21.2069. Channel 13, corr = 0.7538, mse = 189.899586, mean = 52.4404. Channel 14, corr = 0.6541, mse = 186.663401, mean = 41.4571. Channel 15, corr = 0.6191, mse = 176.365483, mean = 38.8859.
print(list(corr_mean))
print(list(mse_mean))
print(list(mean_mean))
[0.6121488314468478, 0.5813439248928324, 0.5066102433964742, 0.4664310554762077, 0.4905492008347983, 0.45256908809767626, 0.710976250229428, 0.713378460854995, 0.5676987138026933, 0.5782117262560403, 0.6755104586927148, 0.4840726203855586, 0.7538127126198424, 0.6540542175087454, 0.619116153191103] [83.63799096488783, 66.47109541384013, 103.55284714716143, 45.27838585366898, 34.79639797395836, 195.37122479627513, 222.71955955419048, 208.34756018990512, 44.83775731175517, 114.56275163804216, 413.5363541639645, 95.61789735507624, 189.8995862884719, 186.66340084516858, 176.3654830970523] [29.558443014706768, 27.414924678304136, 32.54528668084857, 19.545427740051515, 13.022679916323572, 30.98961445901738, 50.535490176304165, 55.58334129172573, 20.86980557166033, 31.763762002375774, 46.63335753107305, 21.206949619877204, 52.44043212360176, 41.45713725737416, 38.88586835658078]
bhvs = {'crawling': np.array([0]),
'high picking treats': np.array([1]),
'low picking treats': np.array([2]),
'pg': np.array([3]),
'sitting still': np.array([4]),
'grooming': np.array([5]),
'no_behavior': np.array([-1])}
inv_bhvs = {int(v): k for k, v in bhvs.items()}
test_dataset = TensorDataset(torch.from_numpy(X_test))
z_run_test = vrae.transform(test_dataset, save = False)
z_run_all = np.vstack((z_run, z_run_test))
y_all = np.vstack((y_train, y_test))
visualize(z_run = z_run_all, y = y_all, inv_bhvs = inv_bhvs, one_in = 4)